<!DOCTYPE html>



  


<html class="theme-next mist use-motion" lang="zh-Hans">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />
















  
  
  <link href="/jexo/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />







<link href="/jexo/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/jexo/css/main.css?v=5.1.4" rel="stylesheet" type="text/css" />


  <link rel="apple-touch-icon" sizes="180x180" href="/jexo/images/apple-touch-icon-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="32x32" href="/jexo/images/favicon-32x32-next.png?v=5.1.4">


  <link rel="icon" type="image/png" sizes="16x16" href="/jexo/images/favicon-16x16-next.png?v=5.1.4">


  <link rel="mask-icon" href="/jexo/images/logo.svg?v=5.1.4" color="#222">





  <meta name="keywords" content="Pythone,爬虫," />










<meta name="description" content="categories: Python爬虫 库urllib2模拟http请求获取html BeautifulSoup根据选择器获取dom结点,可查看css选择器">
<meta name="keywords" content="Pythone,爬虫">
<meta property="og:type" content="article">
<meta property="og:title" content="抓取起点免费小说">
<meta property="og:url" content="http://ja_project.gitee.io/jexo/2018/04/18/抓取起点免费小说/index.html">
<meta property="og:site_name" content="Ling Hu Blog">
<meta property="og:description" content="categories: Python爬虫 库urllib2模拟http请求获取html BeautifulSoup根据选择器获取dom结点,可查看css选择器">
<meta property="og:locale" content="zh-Hans">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-86af1d8f6570d727.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-e551b8b5bf060e23.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-3fbcc454ab7df54a.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-3771351f0906522b.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-b099a0e5f2cbf4e5.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-0656646646b1cf0a.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-a02cd1ee84511b98.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-48b000c84ef29a25.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/4241874-e21239f47893356c.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:updated_time" content="2018-04-18T13:34:00.000Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="抓取起点免费小说">
<meta name="twitter:description" content="categories: Python爬虫 库urllib2模拟http请求获取html BeautifulSoup根据选择器获取dom结点,可查看css选择器">
<meta name="twitter:image" content="http://upload-images.jianshu.io/upload_images/4241874-86af1d8f6570d727.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/jexo/',
    scheme: 'Mist',
    version: '5.1.4',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://ja_project.gitee.io/jexo/2018/04/18/抓取起点免费小说/"/>





  <title>抓取起点免费小说 | Ling Hu Blog</title>
  





  <script type="text/javascript">
    var _hmt = _hmt || [];
    (function() {
      var hm = document.createElement("script");
      hm.src = "https://hm.baidu.com/hm.js?abc8796c232e2b898d516245257842b4";
      var s = document.getElementsByTagName("script")[0];
      s.parentNode.insertBefore(hm, s);
    })();
  </script>




</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/jexo/"  class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">Ling Hu Blog</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle"></p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/jexo/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/jexo/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br />
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/jexo/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/jexo/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br />
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/jexo/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            归档
          </a>
        </li>
      

      
    </ul>
  

  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://ja_project.gitee.io/jexo/jexo/2018/04/18/抓取起点免费小说/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="Ling Hu">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/jexo/images/avatar.jpg">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Ling Hu Blog">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">抓取起点免费小说</h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2018-04-18T21:30:28+08:00">
                2018-04-18
              </time>
            

            

            
          </span>

          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/jexo/2018/04/18/抓取起点免费小说/#comments" itemprop="discussionUrl">
                  <span class="post-comments-count gitment-comments-count" data-xid="/jexo/2018/04/18/抓取起点免费小说/" itemprop="commentsCount"></span>
                </a>
              </span>
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <p>categories: Python爬虫</p>
<h3 id="库"><a href="#库" class="headerlink" title="库"></a>库</h3><h5 id="urllib2"><a href="#urllib2" class="headerlink" title="urllib2"></a>urllib2</h5><pre><code>模拟http请求获取html
</code></pre><h5 id="BeautifulSoup"><a href="#BeautifulSoup" class="headerlink" title="BeautifulSoup"></a>BeautifulSoup</h5><pre><code>根据选择器获取dom结点,可查看css选择器
</code></pre><a id="more"></a>
<h3 id="抓取逻辑"><a href="#抓取逻辑" class="headerlink" title="抓取逻辑"></a>抓取逻辑</h3><ol>
<li><p>查看起点免费小说列表<br> <a href="https://www.qidian.com/free/all" target="_blank" rel="noopener">https://www.qidian.com/free/all</a></p>
</li>
<li><p>先搞懂一本书的抓取逻辑</p>
<p>  <em>2.1 根据选择器获取到书的链接和书名</em></p>
<pre><code>![1.png](http://upload-images.jianshu.io/upload_images/4241874-9b2b36f225de4fb7.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
</code></pre> <figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">bookCover = book.select(<span class="string">"div[class='book-mid-info'] h4 &gt; a"</span>)[<span class="number">0</span>]</span><br></pre></td></tr></table></figure>
<p>利用css选择器，直接定位到我们需要的div。</p>
<p>  <em>2.2 创建并打开文件</em></p>
  <figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">   bookFile = open(<span class="string">"crawler/books/"</span> + bookCover.string + <span class="string">".txt"</span>,</span><br><span class="line"><span class="string">"a+"</span>)</span><br></pre></td></tr></table></figure>
<p>   使用”a+”模式打开，如果不存在就创建这个文件，如果存在，就追加内容。创建的txt文件名也就是抓取到的dom结点的text</p>
<p>   <em>2.3 跳转到正文内容</em><br>  先获取到<code>&quot;div[class=&#39;book-mid-info&#39;] h4 &gt; a&quot;</code> 这个结点的<code>href</code>地址，然后获取到返回内容，如下图<br> <img src="http://upload-images.jianshu.io/upload_images/4241874-86af1d8f6570d727.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="4.png"></p>
<p>再获取到<code>免费试读</code>这个结点的<code>href</code>，再获取它的返回内容</p>
<p> <em>2.4 递归获取到每一张的内容，写入文件</em></p>
<p><img src="http://upload-images.jianshu.io/upload_images/4241874-e551b8b5bf060e23.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="5.png"><br><img src="http://upload-images.jianshu.io/upload_images/4241874-3fbcc454ab7df54a.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="6.png"></p>
<p> 通过class获取到结点内容，然后再获取到<code>下一章</code>的<code>href</code>然后递归获取每章内容。</p>
<p><img src="http://upload-images.jianshu.io/upload_images/4241874-3771351f0906522b.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="7.png"></p>
<p> 如果没有<code>下一页</code>而是<code>书末页</code>就说明已经最后一章了，递归结束，一本书的内容也就获取完毕了。</p>
</li>
<li><p>循环获取当前页的每本书内容</p>
<p><img src="http://upload-images.jianshu.io/upload_images/4241874-b099a0e5f2cbf4e5.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="2.png"><br>每本书其实都是一个<code>li</code>标签，先获取到所有的<code>li</code>然后按照第二步进行遍历。</p>
</li>
<li><p>循环获取所有页面的书</p>
<p><img src="http://upload-images.jianshu.io/upload_images/4241874-0656646646b1cf0a.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="3.png"><br>当当前页面所有的书本都抓取完毕了，那么我们可以获取下<code>&gt;</code>对应的<code>href</code>然后获取到返回内容，继续循环抓取。</p>
<p><img src="http://upload-images.jianshu.io/upload_images/4241874-a02cd1ee84511b98.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="3.1.png"><br>直到抓取到最后一页,<code>&gt;</code>这个dom结点的<code>class</code>会增加一个为<code>lbf-pagination-disabled</code>,可以根据这个来判断是否为最后一页。</p>
</li>
</ol>
<h3 id="成品展示"><a href="#成品展示" class="headerlink" title="成品展示"></a>成品展示</h3><p>   <img src="http://upload-images.jianshu.io/upload_images/4241874-48b000c84ef29a25.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="8.png"></p>
<p><img src="http://upload-images.jianshu.io/upload_images/4241874-e21239f47893356c.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="9.png"></p>
<h3 id="完整代码"><a href="#完整代码" class="headerlink" title="完整代码"></a>完整代码</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># coding=utf-8</span></span><br><span class="line"><span class="keyword">import</span> urllib2</span><br><span class="line"><span class="keyword">import</span> sys</span><br><span class="line"><span class="keyword">from</span> bs4 <span class="keyword">import</span> BeautifulSoup</span><br><span class="line"></span><br><span class="line"><span class="comment">#设置编码</span></span><br><span class="line">reload(sys)</span><br><span class="line">sys.setdefaultencoding(<span class="string">'utf-8'</span>)</span><br><span class="line"></span><br><span class="line">startIndex = <span class="number">0</span> <span class="comment">#默认第0本</span></span><br><span class="line">startPage = <span class="number">0</span> <span class="comment">#默认第0页</span></span><br><span class="line"></span><br><span class="line"><span class="comment">#获取一个章节的内容</span></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">getChapterContent</span><span class="params">(file,url)</span>:</span></span><br><span class="line">    <span class="keyword">try</span>:</span><br><span class="line">        bookContentRes = urllib2.urlopen(url)</span><br><span class="line">        bookContentSoup = BeautifulSoup(bookContentRes.read(), <span class="string">"html.parser"</span>)</span><br><span class="line">        file.write(bookContentSoup.select(<span class="string">"h3[class='j_chapterName']"</span>)[<span class="number">0</span>].string + <span class="string">'\n'</span>)</span><br><span class="line">        <span class="keyword">for</span> p <span class="keyword">in</span> bookContentSoup.select(<span class="string">".j_readContent p"</span>):</span><br><span class="line">            file.write(p.next + <span class="string">'\n'</span>)</span><br><span class="line">    <span class="keyword">except</span> BaseException:</span><br><span class="line">        <span class="comment">#如果出错了，就重新运行一遍</span></span><br><span class="line">        print(BaseException.message)</span><br><span class="line">        getChapterContent(file, url)</span><br><span class="line">    <span class="keyword">else</span>:</span><br><span class="line">        chapterNext = bookContentSoup.select(<span class="string">"a#j_chapterNext"</span>)[<span class="number">0</span>]</span><br><span class="line">        <span class="keyword">if</span> chapterNext.string != <span class="string">"书末页"</span>:</span><br><span class="line">            nextUrl = <span class="string">"https:"</span> + chapterNext[<span class="string">"href"</span>]</span><br><span class="line">            getChapterContent(file,nextUrl)</span><br><span class="line"></span><br><span class="line"><span class="comment">#获取当前页所有书的内容</span></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">getCurrentUrlBooks</span><span class="params">(url)</span>:</span></span><br><span class="line">    response = urllib2.urlopen(url)</span><br><span class="line">    the_page = response.read()</span><br><span class="line">    soup = BeautifulSoup(the_page, <span class="string">"html.parser"</span>)</span><br><span class="line">    bookArr = soup.select(<span class="string">"ul[class='all-img-list cf'] &gt; li"</span>)</span><br><span class="line">    <span class="keyword">global</span> startIndex</span><br><span class="line">    <span class="keyword">if</span> startIndex &gt; <span class="number">0</span>:</span><br><span class="line">        bookArr = bookArr[startIndex:]</span><br><span class="line">        startIndex = <span class="number">0</span></span><br><span class="line">    <span class="keyword">for</span> book <span class="keyword">in</span> bookArr:</span><br><span class="line">        bookCover = book.select(<span class="string">"div[class='book-mid-info'] h4 &gt; a"</span>)[<span class="number">0</span>]</span><br><span class="line">        <span class="keyword">print</span> <span class="string">"书名："</span> + bookCover.string</span><br><span class="line">        <span class="comment"># 先创建.txt文件，然后获取文本内容写入</span></span><br><span class="line">        bookFile = open(<span class="string">"crawler/books/"</span> + bookCover.string + <span class="string">".txt"</span>, <span class="string">"a+"</span>)</span><br><span class="line">        bRes = urllib2.urlopen(<span class="string">"https:"</span> + bookCover[<span class="string">'href'</span>])</span><br><span class="line">        bSoup = BeautifulSoup(bRes.read(), <span class="string">"html.parser"</span>)</span><br><span class="line">        bookContentHref = bSoup.select(<span class="string">"a[class='red-btn J-getJumpUrl ']"</span>)[<span class="number">0</span>][<span class="string">"href"</span>]</span><br><span class="line">        getChapterContent(bookFile, <span class="string">"https:"</span> + bookContentHref)</span><br><span class="line">        bookFile.close()</span><br><span class="line">    nextPage = soup.select(<span class="string">"a.lbf-pagination-next"</span>)[<span class="number">0</span>]</span><br><span class="line">    <span class="keyword">return</span> nextPage[<span class="string">"href"</span>]</span><br><span class="line"></span><br><span class="line"><span class="keyword">if</span> len(sys.argv)==<span class="number">1</span>:</span><br><span class="line">    <span class="keyword">pass</span></span><br><span class="line"><span class="keyword">elif</span> len(sys.argv) == <span class="number">2</span>:</span><br><span class="line">    startPage = int(sys.argv[<span class="number">1</span>])/<span class="number">20</span> <span class="comment">#从第几页开始下载</span></span><br><span class="line">    startIndex = int(sys.argv[<span class="number">1</span>])%<span class="number">20</span>  <span class="comment"># 从第几本开始下载</span></span><br><span class="line"><span class="keyword">elif</span> len(sys.argv) &gt; <span class="number">2</span>:</span><br><span class="line">    startPage = int(sys.argv[<span class="number">1</span>])</span><br><span class="line">    startIndex = int(sys.argv[<span class="number">2</span>])</span><br><span class="line"></span><br><span class="line"><span class="comment">#根据传入参数设置从哪里开始下载</span></span><br><span class="line">url = <span class="string">"//www.qidian.com/free/all?orderId=&amp;vip=hidden&amp;style=1&amp;pageSize=20&amp;siteid=1&amp;pubflag=0&amp;hiddenField=1&amp;page="</span>+str(startPage+<span class="number">1</span>)</span><br><span class="line"></span><br><span class="line"><span class="comment">#死循环 直到没有下一页</span></span><br><span class="line"><span class="keyword">while</span> <span class="keyword">True</span>:</span><br><span class="line">    <span class="keyword">if</span> url.startswith(<span class="string">"//"</span>):</span><br><span class="line">        url = getCurrentUrlBooks(<span class="string">"https:"</span> + url)</span><br><span class="line">    <span class="keyword">else</span>:</span><br><span class="line">        <span class="keyword">break</span>;</span><br></pre></td></tr></table></figure>

      
    </div>
    
    
    

    

    
      <div>
        <div style="padding: 10px 0; margin: 20px auto; width: 90%; text-align: center;">
  <div>Donate comment here</div>
  <button id="rewardButton" disable="enable" onclick="var qr = document.getElementById('QR'); if (qr.style.display === 'none') {qr.style.display='block';} else {qr.style.display='none'}">
    <span>打赏</span>
  </button>
  <div id="QR" style="display: none;">

    
      <div id="wechat" style="display: inline-block">
        <img id="wechat_qr" src="/jexo/images/wechatpay.png" alt="Ling Hu 微信支付"/>
        <p>微信支付</p>
      </div>
    

    
      <div id="alipay" style="display: inline-block">
        <img id="alipay_qr" src="/jexo/images/alipay.png" alt="Ling Hu 支付宝"/>
        <p>支付宝</p>
      </div>
    

    

  </div>
</div>

      </div>
    

    

    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/jexo/tags/Pythone/" rel="tag"># Pythone</a>
          
            <a href="/jexo/tags/爬虫/" rel="tag"># 爬虫</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/jexo/2018/04/18/AndroidApp启动流程/" rel="next" title="Android App 启动流程">
                <i class="fa fa-chevron-left"></i> Android App 启动流程
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/jexo/2018/04/18/Android 自定义v7 AlertDialog样式/" rel="prev" title="Android 自定义v7 AlertDialog样式">
                Android 自定义v7 AlertDialog样式 <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          


          

  
    <div class="comments" id="comments">
      
        <div id="gitment-container"></div>
      
    </div>

  



        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview-wrap">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview-wrap sidebar-panel">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <img class="site-author-image" itemprop="image"
                src="/jexo/images/avatar.jpg"
                alt="Ling Hu" />
            
              <p class="site-author-name" itemprop="name">Ling Hu</p>
              <p class="site-description motion-element" itemprop="description">Android,源码解析,每天进步一点点</p>
          </div>

          <nav class="site-state motion-element">

            
              <div class="site-state-item site-state-posts">
              
                <a href="/jexo/archives/">
              
                  <span class="site-state-item-count">19</span>
                  <span class="site-state-item-name">日志</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-categories">
                <a href="/jexo/categories/index.html">
                  <span class="site-state-item-count">9</span>
                  <span class="site-state-item-name">分类</span>
                </a>
              </div>
            

            
              
              
              <div class="site-state-item site-state-tags">
                <a href="/jexo/tags/index.html">
                  <span class="site-state-item-count">14</span>
                  <span class="site-state-item-name">标签</span>
                </a>
              </div>
            

          </nav>

          

          
            <div class="links-of-author motion-element">
                
                  <span class="links-of-author-item">
                    <a href="https://github.com/javalong" target="_blank" title="GitHub">
                      
                        <i class="fa fa-fw fa-github"></i>GitHub</a>
                  </span>
                
            </div>
          

          
          

          
          

          

        </div>
      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-3"><a class="nav-link" href="#库"><span class="nav-number">1.</span> <span class="nav-text">库</span></a><ol class="nav-child"><li class="nav-item nav-level-5"><a class="nav-link" href="#urllib2"><span class="nav-number">1.0.1.</span> <span class="nav-text">urllib2</span></a></li><li class="nav-item nav-level-5"><a class="nav-link" href="#BeautifulSoup"><span class="nav-number">1.0.2.</span> <span class="nav-text">BeautifulSoup</span></a></li></ol></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#抓取逻辑"><span class="nav-number">2.</span> <span class="nav-text">抓取逻辑</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#成品展示"><span class="nav-number">3.</span> <span class="nav-text">成品展示</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#完整代码"><span class="nav-number">4.</span> <span class="nav-text">完整代码</span></a></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright">&copy; <span itemprop="copyrightYear">2018</span>
  <span class="with-love">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">Ling Hu</span>

  
</div>









        







        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  
    <script type="text/javascript" src="/jexo/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/jexo/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
  

  
  
    <script type="text/javascript" src="/jexo/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
  

  
  
    <script type="text/javascript" src="/jexo/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/jexo/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/jexo/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
  


  


  <script type="text/javascript" src="/jexo/js/src/utils.js?v=5.1.4"></script>

  <script type="text/javascript" src="/jexo/js/src/motion.js?v=5.1.4"></script>



  
  

  
  <script type="text/javascript" src="/jexo/js/src/scrollspy.js?v=5.1.4"></script>
<script type="text/javascript" src="/jexo/js/src/post-details.js?v=5.1.4"></script>



  


  <script type="text/javascript" src="/jexo/js/src/bootstrap.js?v=5.1.4"></script>



  


  




	





  





  







<!-- LOCAL: You can save these files to your site and update links -->
    
        
        <link rel="stylesheet" href="https://aimingoo.github.io/gitmint/style/default.css">
        <script src="https://aimingoo.github.io/gitmint/dist/gitmint.browser.js"></script>
    
<!-- END LOCAL -->

    
      <style>
        a.gitment-editor-footer-tip { display: none; }
        .gitment-container.gitment-footer-container { display: none; }
      </style>
    

    
      <script type="text/javascript">
      function renderGitment(){
        var gitment = new Gitmint({
            id: getId(),
            owner: 'javalong',
            repo: 'RxJava-Operator',
            
            lang: "zh-Hans" || navigator.language || navigator.systemLanguage || navigator.userLanguage,
            
            oauth: {
            
            
                client_secret: '06b042e57b8420f5002a6407348dde078bcde761',
            
                client_id: '1b5481d6512b2f28d7a2'
            }});
        gitment.render('gitment-container');
      }

      function getId(){
            if( window.location.pathname.length>=50){
                return window.location.pathname.substr(0,50)
            }else{
                return window.location.pathname
            }
      }

      
      renderGitment();
      
      </script>
    







  





  

  

  

  
  

  

  

  

</body>
</html>
